Libraries

library(clusterProfiler)
library(org.Mm.eg.db)
library(ggplot2)
library(gprofiler2)

Seperate genes based on different patterns

Input for run_select_pattern.sh: RSEM result (GeneMat_HFD_Lingon_LDF.Ebseqresults/GeneMat_HFD_Lingon_LDF.Ebseqresults_FDR_0.05.tab)

Output: Gene list with EnsemblID and GeneSymbol, classified as diffent patterns (Output folder: Matrix/pattern)

#seperate genes based on different patterns
scr/run_select_pattern.sh
#full list of expressed genes
cut -f 1 Matrix/GeneMat_HFD_Lingon_LDF.Ebseqresults | \
   sed 's/_/\t/;s/"//g' | \
   tail -n +2 \
   > Matrix/full_gene_list2.csv

Load data

pattern2.FDR0.05 <- read.table("Matrix/pattern/pattern2_FDR0.05.csv",
                               header = TRUE)
pattern3.FDR0.05 <- read.table("Matrix/pattern/pattern3_FDR0.05.csv",
                               header = TRUE)
pattern4.FDR0.05 <- read.table("Matrix/pattern/pattern4_FDR0.05.csv",
                               header = TRUE)
pattern_table <- read.table(gzfile("Matrix/GeneMat_HFD_Lingon_LDF.Ebseqresults.pattern.gz"),
                            header = TRUE)
full_gene <- read.table("Matrix/full_gene_list.csv", header = FALSE)
names(full_gene) <- c("EnsemblID","GeneSymbol")

RSEM result summary

Summarize the number of genes classified to each pattern (FDR cutoff = 0.05)

names(pattern_table) <- c("HDF","Lingon","LFD")
RSEM_summary <-data.frame(c(0,155,40,123,0,318),
                 row.names = c("pattern1.FDR0.05","pattern2.FDR0.05",
                               "pattern3.FDR0.05","pattern4.FDR0.05",
                               "pattern5.FDR0.05","Sum"))
names(RSEM_summary) <- c("number of genes")
knitr::kable(pattern_table)
HDF Lingon LFD
Pattern1 1 1 1
Pattern2 1 1 2
Pattern3 1 2 1
Pattern4 1 2 2
Pattern5 1 2 3
knitr::kable(RSEM_summary)
number of genes
pattern1.FDR0.05 0
pattern2.FDR0.05 155
pattern3.FDR0.05 40
pattern4.FDR0.05 123
pattern5.FDR0.05 0
Sum 318

Explanation:

ClusterProfiler

ClusterProfiler

ego <- function(pattern_data){
   ego_pattern <- enrichGO(gene = pattern_data[,1], # test gene list
                           universe = full_gene[,1], # background gene list
                           OrgDb = org.Mm.eg.db, 
                           keyType = "ENSEMBL", 
                           ont = 'ALL',       
                           pAdjustMethod = "BH",
                           pvalueCutoff  = 0.01,
                           qvalueCutoff  = 0.05,
                           readable      = TRUE)
   file_name <- paste("results/clusterprofiler/",
                      deparse(substitute(pattern_data)),
                      "_GO_enrich.csv", sep = "")
   write.table(as.data.frame(ego_pattern), file_name, row.names = F, sep = "\t")
   return(ego_pattern)
}

full_entrez <- bitr(full_gene[,1],
                    fromType = "ENSEMBL",
                    toType = "ENTREZID",
                   OrgDb = org.Mm.eg.db)
## 'select()' returned 1:many mapping between keys and columns
eKEGG <- function(pattern_data){
   pattern_entrez <- bitr(pattern_data[,1],
                    fromType = "ENSEMBL",
                    toType = c("ENTREZID"),
                    OrgDb = org.Mm.eg.db)
   eKEGG_pattern <- enrichKEGG(gene = pattern_entrez[,2], # test gene list
                           universe = full_entrez[,2], # background gene list
                           organism='mmu',
                           pvalueCutoff  = 0.01,
                           qvalueCutoff  = 0.05)
   eKEGG_pattern_genes <- setReadable(eKEGG_pattern,
                                      OrgDb = org.Mm.eg.db,
                                      keyType="ENTREZID")
   file_name <- paste("results/clusterprofiler/",
                      deparse(substitute(pattern_data)),
                      "_KEGG_enrich.csv", sep = "")
   write.table(as.data.frame(eKEGG_pattern_genes),
             file_name, row.names = F, sep = "\t")
   return(eKEGG_pattern_genes)
}

GO pattern2.FDR0.05 (LFD specific)

ego_pattern2.FDR0.05 <- ego(pattern2.FDR0.05)
ego_pattern2.FDR0.05_n  <- dim(ego_pattern2.FDR0.05)[1]
ego_pattern2.FDR0.05_n
## [1] 30

Gene counts and significance of enrichment:

barplot(ego_pattern2.FDR0.05, showCategory=ego_pattern2.FDR0.05_n)

Gene ratio, counts and significance of enrichment:

dotplot(ego_pattern2.FDR0.05, showCategory=ego_pattern2.FDR0.05_n)

Differentially expressed genes that belong enriched terms:

heatplot(ego_pattern2.FDR0.05)

GO pattern3.FDR0.05 (Lingon specific)

ego_pattern3.FDR0.05 <- ego(pattern3.FDR0.05)
ego_pattern3.FDR0.05_n  <- dim(ego_pattern3.FDR0.05)[1]
ego_pattern3.FDR0.05_n
## [1] 10

Gene counts and significance of enrichment:

barplot(ego_pattern3.FDR0.05,showCategory=ego_pattern3.FDR0.05_n,drop=T)

Gene ratio, counts and significance of enrichment:

dotplot(ego_pattern3.FDR0.05,showCategory=ego_pattern3.FDR0.05_n)

Differentially expressed genes that belong enriched terms:

heatplot(ego_pattern3.FDR0.05)

GO pattern4.FDR0.05 (HFD specific)

ego_pattern4.FDR0.05 <- ego(pattern4.FDR0.05)
ego_pattern4.FDR0.05_n  <- dim(ego_pattern4.FDR0.05)[1]
ego_pattern4.FDR0.05_n
## [1] 27

Gene counts and significance of enrichment:

barplot(ego_pattern4.FDR0.05,showCategory=ego_pattern4.FDR0.05_n,drop=T)

Gene ratio, counts and significance of enrichment:

dotplot(ego_pattern4.FDR0.05,showCategory=ego_pattern4.FDR0.05_n)

Differentially expressed genes that belong enriched terms:

heatplot(ego_pattern4.FDR0.05)

KEGG pattern2.FDR0.05 (LFD specific)

eKEGG_pattern2.FDR0.05 <- eKEGG(pattern2.FDR0.05)
knitr::kable(as.data.frame(eKEGG_pattern2.FDR0.05))
ID Description GeneRatio BgRatio pvalue p.adjust qvalue geneID Count
mmu04610 mmu04610 Complement and coagulation cascades 10/68 88/6987 0 1.6e-06 1.6e-06 Serpine1/Fga/Kng1/Fgg/Fgb/Serpina1e/Serpina1c/Plg/F2/F2r 10

KEGG pattern3.FDR0.05 (Lingon specific)

eKEGG_pattern3.FDR0.05 <- eKEGG(pattern3.FDR0.05)
knitr::kable(as.data.frame(eKEGG_pattern3.FDR0.05))
ID Description GeneRatio BgRatio pvalue p.adjust qvalue geneID Count

KEGG pattern4.FDR0.05 (HFD specific)

eKEGG_pattern4.FDR0.05 <- eKEGG(pattern4.FDR0.05)
knitr::kable(as.data.frame(eKEGG_pattern4.FDR0.05))
ID Description GeneRatio BgRatio pvalue p.adjust qvalue geneID Count
mmu04015 mmu04015 Rap1 signaling pathway 9/51 201/6987 1.20e-05 0.0022877 0.0016604 Prkd2/Mapk3/Calml3/Arap3/Fgfr4/Pik3r3/Adcy8/Cdh1/Gnai2 9
mmu04024 mmu04024 cAMP signaling pathway 8/51 201/6987 8.97e-05 0.0085247 0.0061869 Mapk3/Calml3/Arap3/Pde4b/Pik3r3/Ppp1r1b/Adcy8/Gnai2 8

gProfiler

gProfiler2

namesP <- c("term_id", "source", "p_value", 'term_size', 'intersection_size',
            "term_name","intersection")
ggo <- function(pattern_data){
   gost_pattern <- gost(pattern_data[,2],
                        domain_scope = "custom",
                        custom_bg = full_gene[,2],
                        sources = c('GO', 'KEGG', 'REAC', 'WP'),
                        organism ='mmusculus',
                        significant = TRUE,
                        evcodes=TRUE)
   file_name <- paste("results/gprofiler/",
                      deparse(substitute(pattern_data)),
                      "_gProfiler_enrich.csv", sep = "")
   write.table(as.data.frame(gost_pattern$result[,namesP]),
            file_name, row.names = F, sep = "\t")
   return(gost_pattern)
}

pattern2.FDR0.05 (LFD specific)

gost_pattern2 <- ggo(pattern2.FDR0.05)
gostplot(gost_pattern2)

pattern3.FDR0.05 (Lingon specific)

gost_pattern3 <- ggo(pattern3.FDR0.05)
gostplot(gost_pattern3)

pattern4.FDR0.05 (HFD specific)

gost_pattern4 <- ggo(pattern4.FDR0.05)
gostplot(gost_pattern4)

Results tables